*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	This program identifies screened NYC high schools.
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
args bw
*   ----------------------------------------------------------------------------

foreach year in 2012 2013 2014 {

	* Load pscores without bandwidth risk (MDRD1)
	use "${cleandata}program_pscore_nobw_`year'.dta", clear

	* Save in tempfile
	keep stu prg_bkt pscore_form pscore_freq
	ren pscore_* pscore_*_no_bw
	tempfile nobw
	sa "`nobw'"

	* Load general risk pscores (MDRD2)
	use "${cleandata}program_pscore`bw'_`year'.dta", clear

	* Check if we assigned same program as in the match
	gen same_prog = prg_offered  == stripp

	* Same for Ed.Opt
	gen prog_offered_aug  =  prg_offered
	replace prog_offered_aug  =  prg_offered +"_"+ edopt_offered_suffix if  edopt_offered_suffix != ""
	gen same_aug_prog = prog_offered_aug == ourmatch

	* Generate indicator for missing lotto
	gen missing_lotto = lottery_rank   == .

	* Generate School ID from last 4 digits of the dbn variable
	preserve
		keep final_disposition final_disposition_dbn
		duplicates drop
		ren final_disposition prg
		ren final_disposition_dbn dbn //check if this is supposed to be sch
		tempfile dbns
		sa "${cleandata}/app_school_dbns_`year'.dta", replace
	restore
	merge m:1 prg using "${cleandata}/app_school_dbns_`year'.dta", keep(1 3)  gen(dbn_merge)

	ren dbn app_school_dbn

	* Generate Ed.Opts. indicator
	gen edopts = edopt_r == 1 |  edopt_s == 1

	* Merge MDRD1 pscore
	merge 1:1 stu prg_bkt using "`nobw'", nogen

	* ----------------------------------------------------------------------------
	* Collapsing ed opt buckets to the same program
	* ----------------------------------------------------------------------------

	* Generate simplified school ID
	gen app_dbn_last_4 = substr(app_school_dbn, 3, 4)

	* Generate indicator for screened school using lottery, excluding Ed.Opts.
	gen screened_with_lottery_noedopts = screened_uses_lottery == 1 & description != "Ed. opt."

	* Generate indicator for number of offers by program
	bys stu prg : egen offer_prg = max( offer_mod)

	* Merge in our simulated offers
	merge m:1 stu using "${cleandata}best_guess_simulated_match_`year'.dta", nogen keep(1 3) keepusing(ourmatch_noedops)

	* Generate offer variable for our match
	gen sim_offer_prg = prg == ourmatch_noedops

	* Aggegrate pscores for Ed.Opts. buckets by program
	* (generate program-level pscore)
	foreach score in pscore_form pscore_form_bw pscore_form_no_bw ///
		pscore_freq pscore_freq_no_bw pscore_qbw {
		bys stu prg : egen double `score'_prog = total( `score' )
	}

	* Generate has_bw flag at the program level (rather than bucket level)
	* mostly to collapse has_bw status for EdOpts
	bys stu prg: egen has_bw_prg = max(has_bw)

	keep stu prg desc offer_prg sim_offer_prg app_school_dbn pscore_form_prog ///
		pscore_*_prog  choice app_dbn_last_4 missing_lotto same_prog same_aug_prog screened_with_lottery_noedopts has_bw_prg

	duplicates drop
	isid stu prg

	tempfile original
	sa "`original'"

	* ----------------------------------------------------------------------------
	* II Creating match-based dummies for instruments and pscores
	* ----------------------------------------------------------------------------

		* 	1) Create file with choices (this keeps ineligible apps)
			* This is used mainly for the endogenous variable. If an applicant
			* enrolls in their 1st/2nd/3rd choice, which is ineligible, they
			* can't get an offer in the match but still enroll at such a school.
			use "${cleandata}nyc_match_reshape`year'.dta", clear

			merge m:1 prg using "${cleandata}/app_school_dbns_`year'.dta", keep(1 3)

			gen app_dbn_last_4 = substr(dbn, 3, 4)

			keep stu choice app_dbn_last_4 prg

			levelsof choice, local(lchoices)
			foreach choice of local lchoices {

				* We use the LISTED choice. This is different from the choice
				* in the match file because we dropped ineligible apps.
				gen listed_`choice' = choice == `choice'

				preserve
					keep if choice == `choice'
					keep stu app_dbn_last_4  prg
					ren app_dbn_last_4 listed_`choice'_last_4
					ren prg prog_`choice'
					duplicates drop
					tempfile choicedbn
					sa "`choicedbn'", replace
				restore

				merge m:1 stu using "`choicedbn'", nogen
			}

			keep stu listed_*_last_4 prog_1 prog_2 prog_3
			duplicates drop
			tempfile full_choices
			sa "`full_choices'"

			* Merge choices back in to the original sample, only keeping applications
			* from the original sample
			use "`original'", clear

			merge m:1 stu using "`full_choices'", gen(choice_merge) keep( 1 3)

		* 	2) Generate indicator for top 3 choices based on realized match
			* (after dropping ineligible applications)
			gen list_3_choice  = ( choice <= 3 )

		* 	3) Choice 1 / 2-12 , 1-3 / 4-12
			gen list_1 = ( inlist(choice,1) )
			gen list_2_12 = ( inrange(choice,2,12) )

			gen list_1_3 = ( inrange(choice,1,3) )
			gen list_4_12 = ( inrange(choice,4,12) )

		* 	4) Top 3 choices based on actual ranking (keeping ineligible)
			gen top_3_school = inlist(app_dbn_last_4, listed_1_last_4, listed_2_last_4, listed_3_last_4 )

		* 	5) Identify programs that screen applicants
			gen screened_prog = inlist(description,"Screened", "Audition", "Screened For Language")
			gen unscreened_prog = inlist(description , "Unscreened" ,"Zoned" , "Limited Unscreened")
			gen edopt_prog =  inlist(description , "Ed. opt.")

			* Identify strictly screened programs: labeled as screened program
			* and do not use lottery
			gen strict_scr = screened_prog == 1 &   screened_with_lottery_noedopts == 0

			* Identify unscreened or screened programs that do use lottery
			gen strict_unscr = unscreened_prog == 1 |  screened_with_lottery_noedopts == 1

			* Identify the share of applications per school that are for
			* screened seats
			bysort app_school_dbn: egen mean_scr = mean(screened_prog )
			gsort app_school_dbn mean_scr
			by app_school_dbn: replace mean_scr = mean_scr[_n-1] if mean_scr[_n-1]!=.

			* Identify the share of applications per school that are for
			* unscreened seats
			bysort app_school_dbn: egen mean_unscr = mean(unscreened_prog)
			gsort app_school_dbn mean_unscr
			by app_school_dbn: replace mean_unscr = mean_unscr[_n-1] if mean_unscr[_n-1]!=.

			* Identify the share of applications per school that are for
			* edopt seats
			bysort app_school_dbn: egen mean_edopt = mean(edopt_prog)
			gsort app_school_dbn mean_edopt
			by app_school_dbn: replace mean_edopt = mean_edopt[_n-1] if mean_edopt[_n-1]!=.

			* Identify the share of applications per school that are for
			* strictly screened seats
			bys app_dbn_last_4: egen mean_strict_scr =  mean(strict_scr )
			bys app_dbn_last_4: egen mean_strict_unscr =  mean(strict_unscr )

			* Check that means add up to 1
			gen sum_test =  mean_scr + mean_unscr + mean_edopt
			su sum_test
			assert `r(min)' > 0.999  & `r(max)' == 1   // some rounding issues
			drop sum_test

			/* Define several versions of screened/unscreened/edopts

			1) Edopt with screen share is screened
					-> Edopts are all screened */

			gen scr1 = mean_scr > 0 | mean_edopt > 0
			gen unscr1 = mean_unscr == 1

			/*
			2) Edopts are a third category, where all programs at that school are edopts
					When unscreened and edopts are mixed, call it edopts
					When screened and edopts are mixed, call it screened */

			gen scr2 = mean_scr > 0 | (mean_edopt > 0 & mean_edopt != 1)
			gen unscr2 = mean_unscr == 1
			gen edopt2 = mean_edopt == 1

			/*
			3) Screened schools are only screened if all programs are screened ,
			and we add in a mixed sector that includes edopts */

			gen scr3 = mean_strict_scr == 1 & has_bw_prg == 1
			gen unscr3 = mean_unscr == 1 | mean_edopt == 1 | !inlist(1, mean_strict_scr, mean_unscr, mean_edopt ) | has_bw_prg != 1

			egen test = rowtotal(scr3 unscr3 )
			su test
			assert `r(min)' == 1  & `r(max)' == 1
			drop test

			/*4) Similar to 3 but break out edopts*/

			gen scr4 	= mean_strict_scr == 1
			gen unscr4 	= mean_unscr == 1 | !inlist(1, mean_strict_scr, mean_unscr, mean_edopt )
			gen edopts4 = mean_edopt == 1

			egen test = rowtotal(scr4 unscr4 edopts4)
			su test
			assert `r(min)' == 1  & `r(max)' == 1
			drop test

			* Generate indicator for some Edopt Share
			gen edopt_v2 = mean_edopt > 0


	* ----------------------------------------------------------------------------
	* III Creating and merge our own dummies for instruments and pscores
	* ----------------------------------------------------------------------------

			* Generate school-level scr3 and unscr3 definitions
			bys app_dbn_last_4 : egen scr3_temp = max(scr3)
			assert inlist(scr3_temp, 0, 1)
			gen unscr3_temp = 1 - scr3_temp

			* Test that sum is 1 (by construction, based on scr3 definition above)
			egen test = rowtotal(scr3_temp unscr3_temp )
			su test
			assert `r(min)' == 1  & `r(max)' == 1
			drop test

			* Replace scr3 and unscr3 with school-level variables for this dataset
			drop scr3 unscr3
			ren (scr3_temp unscr3_temp) (scr3 unscr3)

		*	8) Adding charter flag
			gen charter = substr(prg, 1, 1) == "A"

		* Save screened DBNs

			keep app_dbn_last_4 edopt_v2 edopt2 scr1 unscr1 scr2 unscr2 edopt2 scr3 unscr3 scr4 unscr4 edopts4

			duplicates drop
			isid app_dbn_last_4 , missok
			save "${builddata}screened_dbns_`year'.dta", replace




}
